********************************************************************************
* Supplement to TableA5_survreadm.do
* 
* This script builds the survival and readmission outcomes from 2001-2014 
* 
* Notes: 
* 
* the outcomes are
* s___ - survival
* r___ - readmission
		
* the control specifications are
* _hdx - historical dx adjusted
* _ars - age/race/sex adjusted
* _nra - no risk adjustment
* _cnt - contemporaneous dx adjusted
* _cal - historical + contemporaneous dx adjusted
********************************************************************************		
		


* generate processed datasets of events
* create mortality measures
* create patient counts
set more off
capture log close



* raw data in
* /homes/nber/shruthi-dua51934/sacarny-DUA51934/shruthi-dua51934/cohort_extraction_new/V07.00/master/1cohort_extraction/indx


set matsize 1000

* set to 0001 or 05 or 20 or 100 to choose sample to work from
* NOTE: the hip and knee (hk)  .0001 sample does not have corresponding medpar file 
local sampsize = "100"

* which conditions?
* options: ami chf hip pnu hk
global CONDITIONS "chf hip pnu hk stk"
*global CONDITIONS "ami chf hip pnu hk stk"

* process the PN to AHA crosswalk? 
global PROCXWALK = 0

* make the event data?
global MAKEEVENTS = 0

* make risk-adjusted survival for the long horizon (whole sample)?
global MAKESURVREAD = 1

* seed for tiebreaks
global EVENTSSEED = 789531



* UPDATED ON 10/12/22
* starting yidx for survival/readmission estimates
* set up:
*         -2 = 2001-2003
*	  -1 = 2004-2006
*          0 = 2008-2010
*          1 = 2012-2014 
*
* earliest available is currently -2 (2001-2003)
global MINYIDX = -2

* ending yidx for survival/readmission estimates
* latest available is currently 1 (2012-2014)
global MAXYIDX = 1

* which control specs to use?
* options: hdx ars nra cnt cal
global CONTROLSLIST "hdx ars nra"


* which outcomes to estimate?
* options: s (survival) r (readmisson)
global OUTCOMESLIST "s r"

* lower limit on number of patients when estimating risk-adjusted rates
if ("`sampsize'"!="100") {
	global MINPATS = 5
}
else {
	global MINPATS = 25
}

* make the patient counts?
global MAKECOUNTS = 1


* bring together everything?
* note: not required , did not run this 
global BRINGTOGETHER = 0

* risk adjusters (historical)
global RISKADJUSTERS "ARSINDIC_* como_*"

* risk adjusters (contemporaneous)
global CNTMRISKADJUSTERS "ARSINDIC_* cntm_*"

* risk adjusters (all)
global CALLRISKADJUSTERS "ARSINDIC_* como_* cntm_*"

* risk adjusters (age/race/sex only)                                   
global ARSRISKADJUSTERS "ARSINDIC_*"


* initialize file paths 
local fpath_proj = "/homes/nber/shruthi-dua51934/sacarny-DUA51934/shruthi-dua51934/replication_files"
local fpath_xwalk = "`fpath_proj'/build/crosswalks"
local fpath_ahadat = "`fpath_proj'/build/aha/output"
local fpath_cohortext =  "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/cohort_extraction_new/V07.00/master"
local fpath_survreadm = "/disk/agedisk4/medicare.work/sacarny-DUA51934/sacarny-dua51934/survreadm"
local fpath_log = "`fpath_proj'/Revisions/log_files"

* set working directory 
cd "`fpath_survreadm'"



log using "`fpath_log'/ebfail_extend_survreadm.log", replace 
set more off
clear

adopath ++ PLUS
adopath ++ ./ado/


* make the AHA ID to PN crosswalk (based on Maggie's work)
if ($PROCXWALK) {

	* set up the aha id to pn xw to be used later 
	*use "`fpath_xwalk'/aha_pn_xwalk.dta", clear 
	* set up the aha id to pn xw to be used later 
	use "pn2ahaidxw/aha_pn_xwalkv12.dta", clear 
	
	* expand the xw 
	expand 17

	* gen year sequence 
	bys id pn: egen year = seq(), from(1999) to(2015) block(1)

	* FIX ME: this xw doens't account for pns associated with 
	* multiple  AHA IDs in the same year 
	* maybe a good place to bring in the other xw 
	duplicates drop pn year , force
	*rename id_aha id 
	tostring id, replace 
	save "`fpath_xwalk'/aha_pn_xwalk_expanded.dta", replace 
		
	* bring in the AHA to zip code id 
	use "`fpath_ahadat'/aha_zip.dta", clear
	merge 1:m id year using "`fpath_xwalk'/aha_pn_xwalk_expanded.dta", keep(match)  nogen 
	
	isid pn year
	drop mcrnum 
	
	replace mloczip = string(real(substr(mloczip, 1, 5)), "%05.0f")
	
	destring(mloczip), replace
	
	format mloczip %05.0f

	* account for providers changing zip codes over time
	* assign the first observed zip code as the permanent zip code 
	sort pn year 
	bys pn: replace mloczip = mloczip[1] 
	tostring pn, replace 
	save "`fpath_xwalk'/aha_pn_xwalk_allyears.dta", replace 
}	
 


* make the data
if ($MAKEEVENTS) {

	set seed $EVENTSSEED
		
	local c = 1
	foreach cond in $CONDITIONS {
		display "***** WORKING ON `cond' *****"

		* bring in index events
		use diag_id bene_id diag_year bene_zipclmindx medrawindx fileyear ///
			surv_30 age_grp male race ///
			using "`fpath_survreadm'/src/1cohort_extraction/indx/`cond'`sampsize'_indxdemo.dta"
		isid diag_id
	
 	
		* deal with race
		gen byte nonwhite = race!="1"
		drop race

		* zip of beneficiary
		rename bene_zipclmindx zip_bene
		destring zip_bene, replace
		rename diag_year year

		* bring in provider of index event, discharge date, and source of admission
		merge 1:1 diag_id medrawindx fileyear ///
			using "`fpath_cohortext'/1cohort_extraction/medpar/`cond'`sampsize'_medparclms.dta", ///
			keep(master match) ///
			keepusing( ///
				prvnumgrp er_amt is_shorthosp is_cah dschrgdt admsndt dstntncd dgnscd* ///
			) ///
			generate(match_provider)
		rename prvnumgrp pn
		
		count if match_provider==1
		local unmatched = r(N)
		if (`unmatched'>0) {
			display "***** COULDN'T MATCH `unmatched' INDEX EVENTS TO INDEX CLAIM! ******"
			display "dropping..."
		}
		drop if match_provider==1
		drop match_provider
		
		* generate risk-adjusters using only contemporaneous claims
		* FIXME: only looking at secondary dx. seems ok, but could be an issue
		* for procedure code identified cohorts (e.g. hip/knee replacement)
		* 
		* the next lines replace diagnoses codes with comorbidity 
		* 
		drop dgnscd1
		foreach dx of varlist dgnscd* {
			rename `dx' dgnscd
			merge m:1 dgnscd using "`fpath_survreadm'/comorbid/dx2como.dta", ///
				update keep(master match match_update match_conflict) nogenerate
			drop dgnscd
		}
		
		foreach var of varlist card_* como_* {
			local newvar = regexr("`var'","^(como|card)_","cntm_")
			rename `var' `newvar'
			replace `newvar' = 0 if `newvar'==.
		}
		
		* for hip/knee only: 
		* drop people receiving both hip and knee replacement
		* generate "comorbidity" indicators for categories:
		* hip replacement
		* knee replacement
		if ("`cond'"=="hk") {
			merge 1:1 diag_id medrawindx fileyear ///
				using "`fpath_survreadm'/src/1cohort_extraction/medpar/`cond'`sampsize'_medparclms.dta", ///
				keep(master match) assert(match using) ///
				keepusing( ///
					prcdrcd* ///
				) nogenerate

			gen byte como_hip_any = 0
			gen byte como_knee_any = 0
			
			foreach var of varlist prcdrcd* {
				replace como_hip_any = como_hip_any | `var'=="8151"
				replace como_knee_any = como_knee_any | `var'=="8154"
			}		
			drop prcdrcd*
			
			display "number receiving both hip and knee replacement (dropping):"
			count if como_hip_any & como_knee_any
			drop if como_hip_any & como_knee_any
			
			* categories mutually exclusive and exhaustive
			assert como_hip_any + como_knee_any == 1
			
			* knee replacement only to be omitted category
			drop como_knee_any
			rename como_hip_any como_hiprpl
			label var como_hiprpl "patient received a hip replacement procedure"
		}
		
		* for stroke patients only: 
		* keep only the patients who had an ischemic vs hemorrhagic stroke 
		* generate indicators for ischemic vs hemorrhagic stroke 
		if ("`cond'" == "stk") {
			merge 1:1 diag_id medrawindx fileyear ///
			using "`fpath_survreadm'/src/1cohort_extraction/medpar/`cond'`sampsize'_medparclms.dta", ///
			keep(master match) assert(match using) ///
			keepusing( ///
			dgnscd* ///
			) nogenerate
	
		gen byte como_stk_isc = 0
		gen byte como_stk_hem = 0
		
		foreach var of varlist dgnscd* {
			replace como_stk_isc = como_stk_isc == 1 | `var' == "43301" | `var' == "43310" | ///
								   `var' == "43311" | `var' == "43321" | `var' == "43331" | ///
								   `var' == "43381" | `var' == "43391" | `var' == "43400" | ///
								   `var' == "43401" | `var' == "43411" | `var' == "43491" | ///
								   `var' == "436"  
								   
			replace como_stk_hem = como_stk_hem == 1 | `var' == "430" | `var' == "431" 
		}

		drop dgnscd* 
		
		display "number of patients with both ischemic and hemmorhagic stroke (dropping):"
		count if como_stk_isc & como_stk_hem 
	
		display "number of patients with neither ischemic and hemorrhagic stroke (dropping):"
		count if !como_stk_isc & !como_stk_hem
		
		drop if como_stk_isc & como_stk_hem
		drop if !como_stk_isc & !como_stk_hem

		* check categories are mutually exclusive & exhaustive 
		assert como_stk_isc + como_stk_hem == 1 
		
		* hemmorhagic stroke is the ommitted category 
		drop como_stk_hem 
		label var como_stk_isc "1 if patient had an ischemic stroke" 		
		}
		
		
		* drop the raw observation number and file year 
		drop medrawindx fileyear
		
		* limit to short term and critical access hospitals
		keep if is_shorthosp | is_cah
		drop is_shorthosp is_cah
		
		* indicator for transfers to other sources of inpatient non-rehab/non-nursing care
		gen byte transferred_code = ///
			dstntncd=="02" | dstntncd=="05" | dstntncd=="43" | dstntncd=="62" | dstntncd=="66"
		* indicator for discharged against medical advice or died in hospital
		gen byte amadeath = dstntncd=="07" | dstntncd=="20"
		drop dstntncd
		
		* generate thru ed indicator using ER charges
		gen byte ed = er_amt!=. & er_amt>0
		gen byte noned = !ed
		drop er_amt

		* merge provider to POS to get indicator for non-US state
		merge m:1 pn year using "`fpath_survreadm'/pos/pos.dta", ///
			keepusing(nonstate nonstate typ_control shortterm cah beds_tot) ///
			keep(master match) generate(match_pos)
		
		* merge to POS for pn's that for some reason were not included in the POS in that year
		* merge to the last mention of the POS
		merge m:1 pn using "`fpath_survreadm'/pos/pos_lastyear.dta", ///
			keepusing(nonstate) ///
			keep(master match match_update match_conflict) ///
			generate(match_pos_lastyear) update
		
		display "Observations that did not match to provider of service file (dropping):"
		count if match_pos_lastyear==1
		drop if match_pos_lastyear==1
			
	
		* get rid of patients treated outside the US
		display "Observations at hospitals outside US states:"
		count if nonstate==1
		drop if nonstate==1
		drop nonstate
		
		* rebase to the ids used in the analysis 
		* this also brings in zipcode 		
		merge m:1 pn year using "`fpath_xwalk'/aha_pn_xwalk_allyears.dta", ///
			keep(master match) generate(match_xwlk) 	
		
		display "Observations that did not match the pn-aha ID xw"
		count if match_xwlk == 1 
		drop if match_xwlk == 1 
		drop match_xwlk 
	
		  	
		* bring in the hrr of the provider using the zip2hsahrr xw 
		rename mloczip zip 
		merge m:1 zip year using "`fpath_survreadm'/zip2hsahrr/zip2hsahrr.dta", ///
			keepusing(hrrnum hsanum) ///
			keep(master match) generate(match_id_hrr)
	
	
	
		* merge to last year zipcode was mentioned in zipcode-hrr map for zipcodes that for
		* some reason were not listed in that year's map 
		merge m:1 zip using "`fpath_survreadm'/zip2hsahrr/zip2hsahrr_lastyear.dta", ///
		keepusing(hrrnum hsanum) ///
		keep(master match match_update match_conflict) ///
		generate(match_id_hrr_lastyear) update

		display "Observations for which aha ID couldn't be matched to HRR/HSA (dropping):"	
		count if match_id_hrr_lastyear==1
		drop if match_id_hrr_lastyear==1
		drop match_id_hrr match_id_hrr_lastyear

		rename zip zip_pn
		rename hrrnum hrrnum_pn
		rename hsanum hsanum_pn
		

		/*  
		* rebase to pn_new identifier
		merge m:1 pn using dartmouth_xwlk/pn2pn_new.dta, ///
			keep(master match) generate(match_dxwlk)
		
		display "Observations that did not match to dartmouth crosswalk to pn_new (dropping):"
		count if match_dxwlk==1
		drop if match_dxwlk==1
		drop match_dxwlk
		
		* bring in zip, hrr, hsa of provider (pn_new)
		merge m:1 pn_new using dartmouth_xwlk/pn_new.dta, ///
			assert(match using) keep(match) nogenerate keepusing(zip hrrnum hsanum)
		
		
		* index with regular pn's! *
		* FIXME: i should use a different file for this!! * 
		* bring in zip, hrr, hsa of provider (pn_new)
		merge m:1 pn using dartmouth_xwlk/pn2pn_new_extended.dta, ///
			keep(master match) generate(match_dxwlk) keepusing(zip hrrnum hsanum)
		display "Observations that did not match to pos location file:"
		count if match_dxwlk==1
		drop if match_dxwlk==1
		drop match_dxwlk
						
		rename zip zip_pn
		rename hrrnum hrrnum_pn
		rename hsanum hsanum_pn
		

		* switch provider identifier
		rename pn pn_orig
		rename pn_new pn
		*/  
		 
		* bring in HRR/HSA of patient
		rename zip_bene zip
		
		* merge to zip-HRR/HSA map for the actual year of the patient
		merge m:1 zip year using "`fpath_survreadm'/zip2hsahrr/zip2hsahrr.dta", ///
			keepusing(hrrnum hsanum) ///
			keep(master match) generate(match_bene_hrr)
		
		* merge to last year zipcode was mentioned in zipcode-hrr map for zipcodes that for
		* some reason were not listed in that year's map
		merge m:1 zip using "`fpath_survreadm'/zip2hsahrr/zip2hsahrr_lastyear.dta", ///
			keepusing(hrrnum hsanum) ///
			keep(master match match_update match_conflict) ///
			generate(match_bene_hrr_lastyear) update
	
		display "Observations for which bene couldn't be matched to HRR/HSA (dropping):"
		count if match_bene_hrr_lastyear==1
		drop if match_bene_hrr_lastyear==1
		drop match_bene_hrr match_bene_hrr_lastyear

		rename zip zip_bene
		rename hrrnum hrrnum_bene
		rename hsanum hsanum_bene
		
		* limit to non-federal short-term, cah 
		gen flag_ok =  typ_control!=5 & typ_control!=10 & (shortterm==1|cah==1) ///
		if match_pos==3
		replace flag_ok = 0 if flag_ok == . 

		display "Observations that are not in for-profit hospitals (dropping):" 
		count if flag_ok 
		keep if flag_ok
		drop flag_ok typ_control shortterm match_pos match_pos_lastyear 
		

		* bring in risk adjusters
		merge 1:1 diag_id using "`fpath_survreadm'/src/2create_index_level_measures/last_yr_comorbid/`cond'`sampsize'_medpar_cc.dta", assert(match using) keep(match) nogenerate keepusing( como_* card_*)
	
		* note: the card_ami* variables are based on icd-9 codes whose reporting changes
		* etc. by maurice's recommendation we should just use card_mi. so let's drop
		* the card_ami* vars.
		drop card_ami*
		
		* rename the card_* vars to como_
		foreach var of varlist card_* {
			local newvar = regexr("`var'","^card_","como_")
			rename `var' `newvar'
		}
		 
		* for the ami cohort only, drop como_mi 
		di "Condition is `cond'" 
		if "`cond'" == "ami" { 
			drop como_mi 
		}
				
		* indicator for any hospital admission within 30 days post-discharge
		* spending and summed drg weights for 30 day window starting with admission date
		
		tempfile readmspend
		
		* need to join to medpar claims
		preserve
		
		keep diag_id dschrgdt admsndt
		rename dschrgdt dschrgdt_index
		rename admsndt admsndt_index
		
		joinby diag_id using "`fpath_survreadm'/src/1cohort_extraction/medpar/`cond'`sampsize'_medparclms.dta", unmatched(none)
		
		keep diag_id dschrgdt_index admsndt_index ///
			admsndt dschrgdt flagindx ///
			is_shorthosp is_cah ///
			drg_cd drgprice outlramt
		
		* limit to short term hospitals / critical access hospitals
		keep if is_shorthosp | is_cah
		
		* transfers have admissions the day of, or day following, index discharge
		gen byte transfer = ///
			!flagindx & ///
			(admsndt >= dschrgdt_index) & ///
			(admsndt - dschrgdt_index <= 1)
		
		* readmissions are admissions that are not transfers, and occur w/in 30 days
		gen byte readmission = ///
			!flagindx & ///
			(admsndt >= dschrgdt_index) & ///
			(admsndt - dschrgdt_index > 1) & (admsndt - dschrgdt_index <= 30)
		
		* raw $ spending
		gen spending = drgprice + outlramt
		drop drgprice outlramt
		
		* real spending
		
		* bring in drg weights
		rename drg_cd drg
		destring drg, replace
		gen fyear = year(dofq( qofd(dschrgdt)+1 ))
		merge m:1 drg fyear using "`fpath_survreadm'/drg_productivity/drg_productivity.dta", ///
			keep(master match) generate(match_drgweight)
		tab drg fyear if match_drgweight==1
		if (r(N) != 0) {
			display "*** SOME DRGS COULD NOT BE MATCHED TO WEIGHTS ***"
			display "Number of observations that failed to match:"
			display r(N)
		}
		
		drop match_drgweight
		
		* drg of the index event
		gen drg_index = drg if flagindx
		* weight of the index event
		gen wt_index = drgweight if flagindx
		* fyear of the index event
		gen fyear_index = fyear if flagindx
		drop drg fyear
		
		* now we'll make 30 day spending measures
		* 30 day window begins with admission date and ends with admission date + 30
		
		* suppose people are admitted at the very beginning of the admisison day
		* and people are discharged at the very end of the discharge date --
		* we set discharge day to 12am on discharge day + 1
		
		rename admsndt_index window_start
		gen window_end = window_start + 30
		
		gen span_in_window = ///
			min(max(window_start,dschrgdt+1),window_end) - ///
			min(max(window_start,admsndt),window_end)

		* a hospital stay that begins before the index stay but ends on the day of the
		* index admission will have that last day count in the share. let's knock that
		* day out
		
		* in fact let's knock out all hospital stays that begin before the index
		* admission
		replace span_in_window = 0 if admsndt < window_start
		
		* share of days spent in 30 day window
		gen span_total = dschrgdt+1 - admsndt
		gen share_in_window = span_in_window/span_total
		
		* deflate spending/resources by the share
		replace spending = spending*share_in_window
		replace drgweight = drgweight*share_in_window
		gen wt_index_defl = wt_index*share_in_window
		
		* down to the index event level
		collapse ///
			(max) transfer readmission flagindx ///
			(sum) spending drgweight ///
			(mean) drg_index wt_index wt_index_defl fyear_index, by(diag_id)
		
		assert flagindx==1
		drop flagindx
		
		* save the list
		save `readmspend'
		
		* back to data
		restore
		
		* merge in list of diag_id with transfers/readmissions, spending, drg weights
		merge 1:1 diag_id using `readmspend', assert(match)
		
		rename readmission read_30
		rename transfer transferred_claims
		drop dschrgdt admsndt
		
		rename spending spend_30
		rename drgweight drgwt_30
		
 
		
		* now save data together...
		compress
		save "`fpath_survreadm'/processed_events/`cond'`sampsize'.dta", replace
	
		clear
		local c = `c' + 1
		
	}

}

if ($MAKESURVREAD) {
	foreach cond in $CONDITIONS {

		display "***** WORKING ON `cond' *****"
	
		use "`fpath_survreadm'/processed_events/`cond'`sampsize'.dta", clear 
	
		* create the "in sample" indicator
		gen byte insample = 1
	
		* year index groups years into triples
		* EDIT ON 10/12/22 : adding 2001-03 as the earliest time period 
		* -2 = 2001-2003
		* -1 = 2004-2006
		* 0 = 2008-2010
		* 1 = 2012-2014
		gen yidx = -2 if inrange(year, 2001, 2003)
		replace yidx = -1 if inrange(year, 2004, 2006)
		replace yidx = 0 if inrange(year, 2008, 2010) 
		replace yidx = 1 if inrange(year, 2012, 2014) 
		
		tab yidx
	
		* limit sample to 2004-2014 (the years we have AHA data for) 
		replace insample = 0 if yidx<$MINYIDX | yidx>$MAXYIDX | missing(yidx)
	
		* generate the hospital-cross-yearindex group variable
		* the fixed effects will be at this level		
		* whatever you do, DON'T merge on this between files!!! the same number will
		* represent different hospitals
		egen pnXyidx = group(pn yidx)
		xtset pnXyidx
	
		* with the synthetic pn's from the dartmouth xwalk, each pn now has a fixed
		* location. so no pn should ever move hrr/hsa. sanity check...
		**** remove hsa from here ****
		/*
		foreach area in hrr hsa {
			egen min`area' = min(`area'num_pn) if insample, by(pnXyidx)
			egen max`area' = max(`area'num_pn) if insample, by(pnXyidx)
			assert min`area'==max`area'
			drop min`area' max`area'
		}
		*/ 
	
		* make age/race/sex interactions
		egen arsgroup = group(age_grp nonwhite male)
		* make age/race/sex group indicators
		qui tab arsgroup , gen(ARSINDIC_)
		* omitted category
		drop ARSINDIC_1

		* the grand sample will only include patients with a valid DRG weight and
		* hospital payment
		* this greatly simplifies calculating productivity
		replace insample = 0 if spend_30==.|spend_30==0|drgwt_30==.|drgwt_30==0
		
		* reduce to the grand sample set... survival and readmission samples
		* will be a subset of this
		keep if insample

		* make sure insamp is set to 0 if there are any missing obs		
		markout insample surv_30 read_30 $RISKADJUSTERS

		* impose the minimum patient count

		* patients in the pn-yidx
		egen sr_npats = sum(insample), by(pnXyidx)

		qui distinct pnXyidx if sr_npats >= 1
		local totpn = r(ndistinct)
		qui distinct pnXyidx if sr_npats >= $MINPATS
		local qualpn = r(ndistinct)
		local droppedpn = `totpn' - `qualpn'

		display "Total hospital-yidx: `totpn'"
		display "Hospital-yidx with >= $MINPATS: `qualpn' (Dropping `droppedpn')"

		* sample restriction: must have min number of patients

		replace insample = (sr_npats >= $MINPATS) & insample
		replace sr_npats = 0 if sr_npats < $MINPATS

		* now make the risk-adjusted estimates
		
		xtset pnXyidx

		* the outcomes are:
		* s___ - survival
		* r___ - readmission
		
		* the control specifications are
		* _hdx - historical dx adjusted
		* _ars - age/race/sex adjusted
		* _nra - no risk adjustment
		* _cnt - contemporaneous dx adjusted
		* _cal - historical + contemporaneous dx adjusted
		
		* variables we'll keep post-collapse
		local collapse_str ""

		*For the 5% file to run, comment out for 100% file
		drop como_hyperhd


		* iterate over survival and readmission
		foreach outcome in $OUTCOMESLIST {
		
			if ("`outcome'"=="s") {
				local lhs "surv_30"
			}
			else if ("`outcome'"=="r") {
				local lhs "read_30"
			}
			else {
				exit
			}

			* mean survival/readmission rate in subsample
			di "OUTCOME IS `outcome'" 
			egen `outcome'_`cond'_mean = mean(`lhs') if insample
			
			local collapse_str "`collapse_str' `outcome'_`cond'_mean"
			
			* iterate over control specs
			foreach control in $CONTROLSLIST {
			
				local meas "`outcome'`control'"
				
				display "measure: `meas'"
	
				* risk adjusters for the measures
				if ("`control'"=="ars") {
					* age/race/sex only
					local riskadjusters "$ARSRISKADJUSTERS"
				}
				else if ("`control'"=="nra") {
					* no risk adjusters
		
					* weird bug: xtreg with no rhs variables throws up an error
					* need to make this variable (which is then dropped by xtreg!)
					local riskadjusters "ones"
					gen byte ones = 1
				}
				else if ("`control'"=="cnt") {
					* contemporaneous
					local riskadjusters "$CNTMRISKADJUSTERS"
				}
				else if ("`control'"=="cal") {
					* historical + contemporaneous
					local riskadjusters "$CALLRISKADJUSTERS"
				}
				else if ("`control'"=="hdx") {
					* historical risk adjusters
					local riskadjusters "$RISKADJUSTERS"
				}
				else {
					exit
				}
			
				* now run the FE regression for each yidx
				tempvar fe se ehat
				gen `meas'_`cond'_fe = .
				gen `meas'_`cond'_se = .
				
				local collapse_str "`collapse_str' `meas'_`cond'_fe `meas'_`cond'_se"
				
				forvalues yidx_cur = $MINYIDX/$MAXYIDX {
			
					di "`yidx_cur'" 
					qui count if yidx == `yidx_cur'
					di "In yidx = `yidx_cur': `r(N)'" 
					qui count if insample 
					di "In sample: `r(N)'" 
					count if insample & yidx==`yidx_cur'
					di "In sample and yidx = `yidx_cur' : `r(N)'"

					* run the regression!
					xtreg `lhs' `riskadjusters' ///
						if insample & yidx==`yidx_cur', ///
						fe vce(cluster hrrnum_pn)
					* regression sample should equal the insample indicator
					assert e(sample) == (insample & yidx==`yidx_cur')

					capture mkdir estimates/`meas'
					estimates save estimates/`meas'/`cond'`sampsize'_yidx`yidx_cur'.ster, replace

					* Extract the fixed effects, which are the risk-adjusted rates
					predict `fe' if insample & yidx==`yidx_cur', u

					* Copy into the full FE variable
					* Note the regression was run with a constant term so we must
					* add it to the FE
					replace `meas'_`cond'_fe = `fe' + _b[_cons] ///
						if insample & yidx==`yidx_cur'
					drop `fe'
				
					* Now deal with SEs			
					if ("`control'"=="nra") {
						* fese requires a RHS variable but non-risk adjusted
						* measure has no RHS variables.
						* we'll just make the standard errors ourselves...
						replace `meas'_`cond'_se = ///
							e(sigma_e)*sqrt(1/sr_npats) ///
							if insample & yidx==`yidx_cur'
					}
					else {		
						di "LHS is `lhs'" 
						di "Riskadjusters: `riskadjusters'" 
						di "se: `se'" 
						di "ehat: `ehat'" 				
						* extract the error term fits, which speeds up fese
						predict `ehat' if insample & yidx==`yidx_cur', e
						* since we have risk-adjusters  we can use fese
						fese_adam `lhs' `riskadjusters' ///
							if insample & yidx==`yidx_cur', ///
							ehat(`ehat') homo(`se')
						* copy into the full SE variable
						replace `meas'_`cond'_se = `se' ///
							if insample & yidx==`yidx_cur'
						drop `se' `ehat'
						}
				}			
			
				if ("`control'"=="nra") {
					* weird bug: xtreg with no rhs variables throws up an error so
					* we had to make a variable of ones (which was then dropped by xtreg!)
					* we can now get rid of it
					drop ones
					local riskadjusters ""
				}
			
			}
		}

		rename sr_npats sr_`cond'_npats

		collapse (mean) `collapse_str' ///
			, by( pn yidx hrrnum_pn sr_`cond'_npats )

		isid pn yidx

		* now label things and perform the EB adjustment
		
		* track variables for ordering later
		local order_str ""
		
		* iterate over survival and readmission
		foreach outcome in $OUTCOMESLIST {
			di "The OUTCOME is `outcome'" 
			/*
			if ("`outcome'"=="s") {
				local outcome "survival"
			}
			else if ("`outcome'"=="r") {
				local outcome "readmission"
			}
			else {
				exit
			}
			*/ 
			
			label var `outcome'_`cond'_mean "`cond' `outcome' / aggregate 30 day rate"	
			local order_str "`order_str' `outcome'_`cond'_mean"
		
			* iterate over control specs
			foreach control in $CONTROLSLIST {
			
				local meas "`outcome'`control'"

				if ("`control'"=="ars") {
					local control "age/race/sex"
				}
				else if ("`control'"=="nra") {
					local control "no risk adj"
				}
				else if ("`control'"=="cnt") {
					local control "contemporaneous dx"
				}
				else if ("`control'"=="cal") {
					local control "historical + contemporaneous dx"
				}
				else if ("`control'"=="hdx") {
					local control "historical dx"
				}
				else {
					exit
				}
				
				label var `meas'_`cond'_fe "`cond' `outcome' `control' / raw FE"
				label var `meas'_`cond'_se "`cond' `outcome' `control' / SE of FE"
				
				local order_str "`order_str' `meas'_`cond'_fe `meas'_`cond'_se"

				* perform univariate EB adjustment
				display "performing univariate EB adjustment for `meas'"
				capture noisily ebayes `meas'_`cond'_fe `meas'_`cond'_se ///
					if `meas'_`cond'_fe!=., ///
					absorb(hrrnum_pn) by(yidx) ///
					gen(`meas'_`cond'_eb) ///
					var(`meas'_`cond'_var) uvar(`meas'_`cond'_uvar)
				
				** NOTE 
				** As of 10/21/22: something weird happens for rhdx measures in yidx==1
				** The EB procedure always fails to converge - why? 
				** Working around this by assigning the raw values - need to check this 
				** 
				if (_rc!=0)  {
					if ("`sampsize'"=="100") {
					
					*  in the full sample this is a problem
					display "EB procedure failed to converge "
					error 430 
					
					* for now just copy in the raw values
					gen `meas'_`cond'_eb = `meas'_`cond'_fe ///
					if `meas'_`cond'_fe!=.
					qui summ `meas'_`cond'_fe if `meas'_`cond'_fe!=.
					replace `meas'_`cond'_var = r(Var) if `meas'_`cond'_fe!=.
					replace `meas'_`cond'_uvar = r(Var) if `meas'_`cond'_fe!=.
					}
					else {
						* in smaller samples, no big deal
						* copy in the raw values
						display "continuing because it's < 100% sample"
						gen `meas'_`cond'_eb = `meas'_`cond'_fe ///
							if `meas'_`cond'_fe!=.
						qui summ `meas'_`cond'_fe if `meas'_`cond'_fe!=.
						replace `meas'_`cond'_var = r(Var) if `meas'_`cond'_fe!=.
						replace `meas'_`cond'_uvar = r(Var) if `meas'_`cond'_fe!=.
					}
				}
	
				label var `meas'_`cond'_uvar "`cond' `outcome' `control' / underlying var"
				label var `meas'_`cond'_var "`cond' `outcome' `control' / underlying var (w/in-HRR-yidx)"
				label var `meas'_`cond'_eb "`cond' `outcome' `control' / EB-adj FE"
				local order_str "`order_str' `meas'_`cond'_uvar `meas'_`cond'_var"
				local order_str "`order_str' `meas'_`cond'_eb"
				
			}

		}

		label var hrrnum_pn "actual provider HRR"

		label var pn "actual provider number"
	
		label var yidx "year index (1=2008-2010)"

		order yidx pn hrrnum_pn sr_`cond'_npats `order_str'
			
		sort pn hrrnum_pn yidx
		save "`fpath_survreadm'/surv_read/`cond'`sampsize'_01-14.dta", replace
	
		clear		
	}
}



if ($MAKECOUNTS) {

	foreach cond in $CONDITIONS {
		display "***** WORKING ON `cond' *****"
				
		* now generate counts for patients
		use pn year using processed_events/`cond'`sampsize'_01-14.dta, clear
		
		gen npatients_`cond'_tot = 1
	
		* down to the pn-year
		collapse (sum) npatients_*, by(pn year) fast
				
		* for the actual counts variables, a missing value is tantamount to a zero
		foreach var of varlist npatients_`cond'_* {
			replace `var' = 0 if `var'==.
		}
				
		label var npatients_`cond'_tot "`cond' patients - total patients"
		
		compress
		sort pn year
		save counts/rev_`cond'`sampsize'.dta, replace
		
		clear
	}
}










****
****
**** NOT RUN 
if ($BRINGTOGETHER) {
	clear
	
	gen pn = ""
	gen year = .
	
	label var pn "provider number (dartmouth synthetic)"
	label var year "year"
	
	* bring in counts
	foreach cond in $CONDITIONS {
		merge 1:1 pn year using counts/rev_`cond'`sampsize'.dta, nogenerate
	}
	
	* bring in survival, readmission, productivity, spending, resources
	
	* yidx==1 corresponds to 2006-2008, we use it as a 2008 measure

	gen yidx = 1 if year==2008
	
	* yidx==-3 <=> 1994-1996, use as 1996 measure
	* yidx==-2 <=> 1997-1999, use as 1999 measure
	* yidx==-1 <=> 2000-2002, use as 2002 measure
	* yidx==0 <=> 2003-2005, use as 2005 measure
	
	replace yidx = -3 if year==1996
	replace yidx = -2 if year==1999
	replace yidx = -1 if year==2002
	replace yidx = 0 if year==2005
	
	label variable yidx "year index"
		
	* if a hospital had a survival or readmission but no associated count its year field
	* is now set to missing.

	* for yidx==1, let's make it 2008
	replace year=2008 if yidx==1 & year==.

	* yidx==-3 <=> 1994-1996, use as 1996 measure
	replace year=1996 if yidx==-3 & year==.
	* yidx==-2 <=> 1997-1999, use as 1999 measure
	replace year=1999 if yidx==-2 & year==.
	* yidx==-1 <=> 2000-2002, use as 2002 measure
	replace year=2002 if yidx==-1 & year==.
	* yidx==0 <=> 2003-2005, use as 2005 measure
	replace year=2005 if yidx==0 & year==.
	
	* if a PN had no patients and reported no quality measures in a year, then an obs for
	* that pn-year does not exist in the dataframe. but it should exist with a count set
	* to zero
	
	fillin pn year
	drop _fillin
	
	* now we'll merge in provider location info and then replace the missing counts
	* with zeroes

	* now deal with provider location variables

	* merge provider to dartmouth data to get zip/HRR/HSA
	merge m:1 pn using dartmouth_xwlk/pn2pn_new_extended.dta, nogenerate ///
		keepusing(zip hrrnum hsanum) ///
		assert(match using) keep(match)
	
	rename hrrnum hrrnum_pn
	rename hsanum hsanum_pn
	
	* now deal with missing values in the npatients and nclosest variables
	* need to do this here 'cause the POC, US news, hcahps data files may have
	* added observations to the dataset, or we may have added observations from the
	* fillin command above
	
	* for the actual counts variables, a missing value is tantamount to a zero
	foreach var of varlist npatients_*_* {
		replace `var' = 0 if `var'==.
	}
	
	* fill in the yidx variables too, for hosps that lacked a survival/readmission
	* but had entries in hospital compare etc.
	* yidx==-3 <=> 1996
	replace yidx = -3 if year==1996
	* yidx==-2 <=> 1999
	replace yidx = -2 if year==1999
	* yidx==-1 <=> 2002
	replace yidx = -1 if year==2002
	* yidx==0 <=> 2005
	replace yidx = 0 if year==2005
	* yidx==1 <=> 2008
	replace yidx = 1 if year==2008
	
	gen pn_num = real(pn)
	assert pn_num!=.
	
	sort pn year
	xtset pn_num year
	
	* surv/read sample indicator (for year indices spanning 1996-2008)
	foreach cond in $CONDITIONS {
		gen byte samp_`cond'_tot = ///
			yidx!=. & year>=1996 & npatients_`cond'_tot >= 1 & npatients_`cond'_tot != .
		label var samp_`cond'_tot "`cond' in sample indicator - tot patients"
	}

	* xtset the data
	sort year pn
	xtset pn_num year
			
	foreach cond in $CONDITIONS {
	
		* bring in longsurv effects
		merge m:1 pn yidx ///
			using surv_read/`cond'`sampsize'_01-14.dta, nogenerate ///
			assert(master match)

	}
	
	sort pn year
	save counts_allmeas/rev_counts_allmeas`sampsize'.dta, replace
	clear
	
}


capture log close
